library(tidyverse)
library(broom)
library(ggfortify)
library(jsonlite)
library(knitr)
library(scales)
library(kableExtra)
serversFromConfig <- function(configFile = "../config.json") {
fromJSON(configFile) |>
as_tibble() |>
select(contains("dl")) |>
mutate(server = str_c("Server ", 1:3), .before = 1) |>
rename_with(\(x) str_remove(x, "_dl_servers"), !server) |>
pivot_longer(!server, names_to = "platform", values_to = "ip") |>
mutate(platform = case_match(
platform,
"swarm" ~ "Swarm",
"ipfs" ~ "IPFS",
"arw" ~ "Arweave"
))
}
dataFromJsonRaw <- function(jsonFile = "../results.json") {
jsonlite::fromJSON(jsonFile) |>
as_tibble() |>
unnest(tests) |>
unnest(results) |>
rename(time_sec = download_time_seconds,
replicate = ref,
platform = storage)
}
dataFromJson <- function(rawTable) {
rawTable |>
mutate(sha256_match = (sha256_match == "true")) |>
mutate(platform = ifelse(platform == "Ipfs", "IPFS", platform)) |>
mutate(size_kb = as.integer(size)) |>
select(!size & !server & !timestamp) |>
left_join(serversFromConfig(), by = join_by(platform, ip)) |>
relocate(size_kb, server, time_sec, attempts, sha256_match,
.after = platform)
}Analysis of first run of the benchmarking experiment
Loading and tidying the data
We first set up some functions to load and tidy the raw data:
After loading and tidying the data, here’s what the first few rows of the table look like:
dat <- dataFromJson(dataFromJsonRaw())
dat |>
head(n = 10) |>
kable()| platform | size_kb | server | time_sec | attempts | sha256_match | ip | latitude | longitude | replicate |
|---|---|---|---|---|---|---|---|---|---|
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27 |
| Swarm | 1 | Server 2 | 0.1379130 | 1 | TRUE | 188.245.154.61:1633 | 49.4542 | 11.0775 | 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27 |
| Swarm | 1 | Server 3 | 0.1654890 | 1 | TRUE | 188.245.177.151:1633 | 49.4542 | 11.0775 | 8390191395cd33a3c7f3a63824d484d6f5666766516068daffc81aa1ab583c27 |
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7 |
| Swarm | 1 | Server 2 | 0.3394287 | 1 | TRUE | 188.245.154.61:1633 | 49.4542 | 11.0775 | 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7 |
| Swarm | 1 | Server 3 | 0.1937695 | 1 | TRUE | 188.245.177.151:1633 | 49.4542 | 11.0775 | 6e0c819f68bbf512dbcb4a5d2d696e5347b8dafab7e97df7223db0ada69344d7 |
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56 |
| Swarm | 1 | Server 2 | 0.3230913 | 1 | TRUE | 188.245.154.61:1633 | 49.4542 | 11.0775 | 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56 |
| Swarm | 1 | Server 3 | 0.3515806 | 1 | TRUE | 188.245.177.151:1633 | 49.4542 | 11.0775 | 36e1e9345d559b6affece4568949f6ff2e6beb3b1db80ae5ebbcf3a74f0c5e56 |
| Swarm | 1 | Server 1 | 0.0000000 | 15 | FALSE | 5.9.50.180:8080 | 50.4779 | 12.3713 | 7049fe5e08fd855c3b89788a317f51bf844c20ecf3c5f71f863d8a7c9ed2af0d |
We can do some sanity checks. First of all, the experiment is well balanced, with 30 replicates per size, server, and platform:
dat |>
count(size_kb, server, platform, name = "number of replicates") |>
count(`number of replicates`,
name = "size-server-platform combinations") |>
kable()| number of replicates | size-server-platform combinations |
|---|---|
| 30 | 45 |
And the replicates are also correctly assigned:
dat |>
count(server, replicate, name = "number of replicates") |>
count(`number of replicates`,
name = "server-replicate combinations") |>
kable()| number of replicates | server-replicate combinations |
|---|---|
| 1 | 1350 |
Let us check if any of the sha256 matches failed:
dat |>
count(sha256_match) |>
kable()| sha256_match | n |
|---|---|
| FALSE | 150 |
| TRUE | 1200 |
Indeed, there are 150 failures. Let us check where those failed attempts are:
dat |>
filter(!sha256_match) |>
count(platform, size_kb, server) |>
kable()| platform | size_kb | server | n |
|---|---|---|---|
| Swarm | 1 | Server 1 | 30 |
| Swarm | 10 | Server 1 | 30 |
| Swarm | 100 | Server 1 | 30 |
| Swarm | 1000 | Server 1 | 30 |
| Swarm | 10000 | Server 1 | 30 |
In short, all Swarm downloads on Server 1 have failed, and nothing else.
Those same failed downloads also always had 15 download attempts. All other downloads succeeded in a single attempt:
dat |>
count(platform, attempts, server) |>
pivot_wider(names_from = platform, values_from = attempts) |>
relocate(Swarm, IPFS, Arweave, .after = n) |>
kable()| server | n | Swarm | IPFS | Arweave |
|---|---|---|---|---|
| Server 1 | 150 | 15 | 1 | 1 |
| Server 2 | 150 | 1 | 1 | 1 |
| Server 3 | 150 | 1 | 1 | 1 |
So everything in the data look OK at first glance except for the (Swarm, Server 1) combination.
Preliminary analysis
Plotting the raw results, we get:
dat |>
filter(sha256_match) |>
select(platform | size_kb | server | time_sec) |>
mutate(platform = fct_reorder(platform, time_sec)) |>
mutate(size = case_when(
size_kb == 1 ~ "1 KB",
size_kb == 10 ~ "10 KB",
size_kb == 100 ~ "100 KB",
size_kb == 1000 ~ "1 MB",
size_kb == 10000 ~ "10 MB"
)) |>
mutate(size = fct_reorder(size, size_kb)) |>
ggplot(aes(x = time_sec, color = platform, fill = platform)) +
geom_density(alpha = 0.2, bw = 0.05) +
scale_x_log10() +
labs(x = "Retrieval time (seconds)", y = "Density",
color = "Platform: ", fill = "Platform: ") +
scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
scale_fill_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
facet_grid(server ~ size, scales = "fixed") +
theme_bw() +
theme(legend.position = "bottom", panel.grid = element_blank())Here we have retrieval times (on the log scale) along the x-axis and density of incidence along the y-axis. The curves are higher where there are more data. Colors represent the different storage platforms; facet rows are the different servers used, and facet columns are the various data sizes.
At a glance, we see that IPFS is the fastest. For small files, Swarm is faster than Arweave. For 10MB files, it is a bit slower but still comparable. Somewhat strangely, the Swarm distributions look bimodal, even on Server 2 and Server 3 where the downloads succeeded. This should probably be investigated further.
Now we check the relationship between file size and download times, for each unique platform-server combination (removing the faulty (Swarm, Server 1) data, of course):
mergePlatformServer <- function(dat) {
dat |>
# Make sure platform-server combinations can be properly sorted:
mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
arrange(platform, server, size_kb) |>
# Merge platform-server combinations, for plotting purposes:
mutate(plat_serv = as_factor(str_c(platform, ", ", server)))
}
plotPlatformServerFit <- function(dat, x, y, formula = y ~ x, method = lm,
log_y = TRUE) {
ggplot(dat, aes(x = {{x}}, y = {{y}})) +
geom_point(color = "steelblue", alpha = 0.5) +
geom_smooth(method = method, color = "goldenrod", fill = "goldenrod",
formula = formula) +
scale_x_log10() +
{ if (log_y) scale_y_log10() else scale_y_continuous() } +
labs(x = "File size (KB)", y = "Download time (seconds)") +
facet_wrap(~ plat_serv, scales = "free_y") +
theme_bw()
}
dat |>
mergePlatformServer() |>
# Remove faulty data points, replacing the download times of 0 with NA:
mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
plotPlatformServerFit(size_kb, time_sec, log_y = FALSE)These data have a clear increasing trend. They are also manifestly nonlinear, so fitting linear functions is unlikely to do well. Instead, let us try to fix this by putting download times on the log scale as well:
dat |>
mergePlatformServer() |>
# Remove faulty data points, replacing the download times of 0 with NA:
mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
plotPlatformServerFit(size_kb, time_sec, log_y = TRUE)This is a lot better, although arguably the relationships are still somewhat nonlinear. That said, let us analyze this pattern further by performing a linear regression for each platform-server combination:
regressionDat <- dat |>
filter(sha256_match) |>
mutate(predictor = log10(size_kb), response = log10(time_sec)) |>
nest(data = !platform & !server) |>
mutate(fit = map(data, \(dat) lm(response ~ predictor, data = dat))) |>
mutate(regtab = map(fit, broom::tidy)) |>
unnest(regtab)Then we can inspect the regression statistics both for the intercepts and the slopes:
regressionDat |>
select(!data & !fit) |>
mutate(term = ifelse(term == "(Intercept)", "intercept", "slope")) |>
arrange(term, platform, server) |>
kable()| platform | server | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| Arweave | Server 1 | intercept | 0.0125603 | 0.0105703 | 1.188266 | 0.236632 |
| Arweave | Server 2 | intercept | 0.1716536 | 0.0056452 | 30.406813 | 0.000000 |
| Arweave | Server 3 | intercept | 0.2216665 | 0.0122245 | 18.133022 | 0.000000 |
| IPFS | Server 1 | intercept | -0.7221360 | 0.0308886 | -23.378736 | 0.000000 |
| IPFS | Server 2 | intercept | -1.0486442 | 0.0318166 | -32.959038 | 0.000000 |
| IPFS | Server 3 | intercept | -1.2726393 | 0.0126825 | -100.346280 | 0.000000 |
| Swarm | Server 2 | intercept | -0.6917008 | 0.0433090 | -15.971303 | 0.000000 |
| Swarm | Server 3 | intercept | -0.7425320 | 0.0470355 | -15.786638 | 0.000000 |
| Arweave | Server 1 | slope | 0.0972232 | 0.0043153 | 22.529853 | 0.000000 |
| Arweave | Server 2 | slope | 0.0546925 | 0.0023047 | 23.731274 | 0.000000 |
| Arweave | Server 3 | slope | 0.0700226 | 0.0049906 | 14.030858 | 0.000000 |
| IPFS | Server 1 | slope | 0.2271200 | 0.0126102 | 18.010803 | 0.000000 |
| IPFS | Server 2 | slope | 0.1642424 | 0.0129891 | 12.644666 | 0.000000 |
| IPFS | Server 3 | slope | 0.2057088 | 0.0051776 | 39.730539 | 0.000000 |
| Swarm | Server 2 | slope | 0.3540662 | 0.0176808 | 20.025443 | 0.000000 |
| Swarm | Server 3 | slope | 0.3630591 | 0.0192022 | 18.907212 | 0.000000 |
All parameters are significantly different from zero except for the intercept of Arweave on Server 1. To check how well the assumptions behind linear regression are fulfilled (and thus how much one can trust these results), we make diagnostic plots:
regressionDat |>
filter(term != "(Intercept)") |>
mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
arrange(platform, server) |>
mutate(diagnostics = map(fit, \(x) {
autoplot(x, smooth.colour = NA, alpha = 0.3, colour = "steelblue") +
theme_bw()
} )) |>
mutate(diagnostics = pmap(list(diagnostics, platform, server), \(dia, sto, se) {
gridExtra::grid.arrange(grobs = dia@plots, top = str_c(sto, ", ", se))
} )) |>
suppressMessages() |>
capture.output() |>
invisible()Most of these diagnostics look acceptable, although the residuals clearly do depend on fitted values quite often. This is the case for both Server 2 and Server 3 on Swarm. The same is true for IPFS Server 2 and Arweave Server 1. In both cases, there are additionally large outliers distorting the results. In the other cases, the diagnostics look fine.
Building a predictive model
The above linear model convincingly establishes a positive relationship between log file size and log download times that is not simply due to chance. But it might not be the best model for prediction, because the relationship is manifestly nonlinear, yet the fitted curve was is linear function.
One possible improvement is to try fitting a quadratic or cubic curve. In fact, a suite of models will be fit below, and we will employ model selection to try and choose the best. The models are:
- Quadratic (
quad), with the formula \(y_i = \beta_0 + \beta_1 x_i + \beta_2 x_i^2 + \varepsilon_i\). Here \(y_i\) is the \(i\)th log download time (all logarithms are base 10), \(x_i\) is the \(i\)th log file size, the \(\beta_i\) are the regression coefficients, and \(\varepsilon_i\) is the residual variation. - Quadratic, but without the linear term (
quadsimp). It is the same asquadbut with \(\beta_1\) set to zero: \(y_i = \beta_0 + \beta_1 x_i^2 + \varepsilon_i\). This model is therefore less flexible, but also has fewer parameters which reduces the possibility of overfitting. - Exponential (
exp), with \(y_i = \beta_0 + \beta_1 \exp(x_i) + \varepsilon_i\)
Visually, all these models fit the data reasonably:
plotWithFormula <- function(dat, formula = y ~ x) {
dat |>
mergePlatformServer() |>
mutate(time_sec = ifelse(!sha256_match, NA, time_sec)) |>
plotPlatformServerFit(size_kb, time_sec, formula = formula)
}
plotWithFormula(dat, y ~ x + I(x^2))plotWithFormula(dat, y ~ I(x^2))plotWithFormula(dat, y ~ exp(x))Let us compare these models. We will fit them all and extract relevant regression statistics, then compare their AIC scores to perform model selection:
modelComparison <- dat |>
mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
arrange(platform, server, size_kb) |>
mutate(plat_serv = as_factor(str_c(platform, ", ", server))) |>
filter(sha256_match) |>
mutate(x = log10(size_kb), y = log10(time_sec)) |>
select(plat_serv, x, y) |>
crossing(formula = list(
"linear" = formula(y ~ x),
"quad" = formula(y ~ x + I(x^2)),
"quadsimp" = formula(y ~ I(x^2)),
"exp" = formula(y ~ exp(x))
)) |>
mutate(model = names(formula)) |>
nest(data = x | y) |>
mutate(fit = map2(formula, data, lm)) |>
mutate(regression = map(fit, tidy),
quality = map(fit, glance)) |>
separate(plat_serv, into = c("platform", "server"), sep = ", ")We work with AIC because it is usually favored when prediction is the goal (as opposed to inference, for which BIC is more appropriate). Here is a table with the AIC scores:
modelComparison |>
unnest(quality) |>
select(platform | server | model | AIC) |>
pivot_wider(names_from = model, values_from = AIC) |>
kable()| platform | server | linear | quad | quadsimp | exp |
|---|---|---|---|---|---|
| Swarm | Server 2 | 74.65295 | -24.45972 | -14.61966 | -18.30025 |
| Swarm | Server 3 | 99.41558 | 51.70865 | 50.68415 | 66.33704 |
| IPFS | Server 1 | -26.73906 | -108.95065 | -99.92618 | -98.67160 |
| IPFS | Server 2 | -17.85868 | -135.50458 | -88.71638 | -100.76498 |
| IPFS | Server 3 | -293.78867 | -410.59626 | -397.74715 | -240.91414 |
| Arweave | Server 1 | -348.44062 | -474.17289 | -462.64087 | -487.35987 |
| Arweave | Server 2 | -536.61144 | -569.06277 | -568.41676 | -508.01725 |
| Arweave | Server 3 | -304.82322 | -317.63970 | -319.45885 | -302.36202 |
Finding the best models, based on AIC:
modelComparison |>
unnest(quality) |>
select(platform | server | model | AIC) |>
filter(AIC == min(AIC), .by = c(platform, server)) |>
kable()| platform | server | model | AIC |
|---|---|---|---|
| Swarm | Server 2 | quad | -24.45972 |
| Swarm | Server 3 | quadsimp | 50.68415 |
| IPFS | Server 1 | quad | -108.95065 |
| IPFS | Server 2 | quad | -135.50458 |
| IPFS | Server 3 | quad | -410.59626 |
| Arweave | Server 1 | exp | -487.35987 |
| Arweave | Server 2 | quad | -569.06277 |
| Arweave | Server 3 | quadsimp | -319.45885 |
The best model is quad one in most cases. Even in the cases when it isn’t, its AIC score is not far above that of the best model. This suggests adopting this model for prediction across the board. Later on we’ll also look at the predictions made by quadsimp; as we will see, it makes almost the same ones as quad.
Let us check the diagnostic plots for the quadratic model quad:
modelComparison |>
filter(model == "quad") |>
mutate(diagnostics = map(fit, \(x) {
autoplot(x, smooth.colour = NA, alpha = 0.3, colour = "steelblue") +
theme_bw()
} )) |>
mutate(diagnostics = pmap(list(diagnostics, platform, server),
\(dia, pf, sv, m) {
gridExtra::grid.arrange(grobs = dia@plots,
top = str_c(pf, ", ", sv, ", cubic model"))
} )) |>
suppressMessages() |>
capture.output() |>
invisible()Not always amazing, especially the quantile-quantile plot for IPFS, Server 1. That said, here are the regression results:
modelComparison |>
filter(model == "quad") |>
unnest(regression) |>
select(platform, server, term, estimate, std.error, statistic, p.value) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "beta_0",
"x" ~ "beta_1",
"I(x^2)" ~ "beta_2"
)) |>
kable()| platform | server | term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|---|---|
| Swarm | Server 2 | beta_0 | -0.4370973 | 0.0376917 | -11.5966470 | 0.0000000 |
| Swarm | Server 2 | beta_1 | -0.1551407 | 0.0446488 | -3.4746920 | 0.0006724 |
| Swarm | Server 2 | beta_2 | 0.1273017 | 0.0107037 | 11.8932184 | 0.0000000 |
| Swarm | Server 3 | beta_0 | -0.5328200 | 0.0485860 | -10.9665446 | 0.0000000 |
| Swarm | Server 3 | beta_1 | -0.0563647 | 0.0575539 | -0.9793382 | 0.3290219 |
| Swarm | Server 3 | beta_2 | 0.1048560 | 0.0137975 | 7.5996436 | 0.0000000 |
| IPFS | Server 1 | beta_0 | -0.5521749 | 0.0284402 | -19.4153010 | 0.0000000 |
| IPFS | Server 1 | beta_1 | -0.1128020 | 0.0336896 | -3.3482709 | 0.0010330 |
| IPFS | Server 1 | beta_2 | 0.0849805 | 0.0080765 | 10.5219832 | 0.0000000 |
| IPFS | Server 2 | beta_0 | -0.8506275 | 0.0260311 | -32.6774074 | 0.0000000 |
| IPFS | Server 2 | beta_1 | -0.2317909 | 0.0308358 | -7.5169356 | 0.0000000 |
| IPFS | Server 2 | beta_2 | 0.0990083 | 0.0073923 | 13.3933988 | 0.0000000 |
| IPFS | Server 3 | beta_0 | -1.1938889 | 0.0104053 | -114.7382135 | 0.0000000 |
| IPFS | Server 3 | beta_1 | 0.0482081 | 0.0123259 | 3.9111100 | 0.0001399 |
| IPFS | Server 3 | beta_2 | 0.0393752 | 0.0029549 | 13.3253196 | 0.0000000 |
| Arweave | Server 1 | beta_0 | 0.0797464 | 0.0084182 | 9.4730916 | 0.0000000 |
| Arweave | Server 1 | beta_1 | -0.0371489 | 0.0099720 | -3.7253139 | 0.0002775 |
| Arweave | Server 1 | beta_2 | 0.0335930 | 0.0023906 | 14.0520845 | 0.0000000 |
| Arweave | Server 2 | beta_0 | 0.1931221 | 0.0061355 | 31.4760261 | 0.0000000 |
| Arweave | Server 2 | beta_1 | 0.0117555 | 0.0072680 | 1.6174336 | 0.1079286 |
| Arweave | Server 2 | beta_2 | 0.0107342 | 0.0017424 | 6.1606966 | 0.0000000 |
| Arweave | Server 3 | beta_0 | 0.2531398 | 0.0141848 | 17.8458048 | 0.0000000 |
| Arweave | Server 3 | beta_1 | 0.0070761 | 0.0168030 | 0.4211214 | 0.6742815 |
| Arweave | Server 3 | beta_2 | 0.0157366 | 0.0040282 | 3.9065947 | 0.0001423 |
Plotting the estimates, to see if they are consistent across servers:
modelComparison |>
filter(model == "quad") |>
mutate(platform = fct_relevel(platform, "Swarm", "IPFS", "Arweave")) |>
unnest(regression) |>
select(platform, server, term, estimate) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "beta[0]",
"x" ~ "beta[1]",
"I(x^2)" ~ "beta[2]"
)) |>
ggplot(aes(x = term, y = estimate)) +
geom_point(color = "steelblue", alpha = 0.7) +
scale_x_discrete(labels = parse_format()) +
facet_grid(~ platform) +
theme_bw()While there is some spread, maybe it is not a great mistake to take the average of the parameters and treat them as “the” parameter for one platform. Let us compute these averages:
paramTab <- modelComparison |>
filter(model == "quad") |>
unnest(regression) |>
select(platform, server, term, estimate) |>
summarize(estimate = mean(estimate), .by = c(platform, term)) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "b0",
"x" ~ "b1",
"I(x^2)" ~ "b2"
)) |>
pivot_wider(names_from = term, values_from = estimate)
paramTab |>
rename(`$\\beta_0$` = b0, `$\\beta_1$` = b1, `$\\beta_2$` = b2) |>
kable(escape = FALSE, format = "latex") |>
kable_styling(latex_options = "HOLD_position")So we have the following models:
| Swarm | \(y_i = -0.485 - 0.106 x_i + 0.116 x_i^2\) |
| IPFS | \(y_i = -0.866 - 0.0988 x_i + 0.0745 x_i^2\) |
| Arweave | \(y_i = 0.175 - 0.00611 x_i + 0.02 x_i^2\) |
This predicts that for small file sizes, IPFS is best (smallest \(\beta_0\)), and Swarm is in the middle. In turn, for very large files, Arweave will be best (smallest \(\beta_2\)) and IPFS second best. To see where the curves take over one another, we can plot all three of them:
# s is log10(file size in KB); bi are coeffs; returns log10(download time):
qfun <- function(s, b0, b1, b2) b0 + b1*s + b2*s^2
paramTab |>
mutate(platform = as_factor(platform)) |>
mutate(curve = pmap(list(b0, b1, b2), \(b0, b1, b2) {
tibble(x = seq(0, 7, l = 101)) |>
mutate(y = qfun(x, b0, b1, b2))
} )) |>
unnest(curve) |>
mutate(x = 10^x, y = 10^y) |>
ggplot(aes(x = x, y = y, color = platform)) +
geom_line(linewidth = 1) +
labs(x = "File size", y = "Predicted download time", color = NULL) +
scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
theme_bw()Incidentally: as advertised above, we can create the same plot using the quadsimp model instead. The predictions do not change appreciably:
modelComparison |>
filter(model == "quadsimp") |>
unnest(regression) |>
select(platform, server, term, estimate) |>
summarize(estimate = mean(estimate), .by = c(platform, term)) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "b0",
"I(x^2)" ~ "b1"
)) |>
pivot_wider(names_from = term, values_from = estimate) |>
mutate(platform = as_factor(platform)) |>
mutate(curve = pmap(list(b0, b1), \(b0, b1) {
tibble(x = seq(0, 7, l = 101)) |>
mutate(y = qfun(x, b0, 0, b1))
} )) |>
unnest(curve) |>
mutate(x = 10^x, y = 10^y) |>
ggplot(aes(x = x, y = y, color = platform)) +
geom_line(linewidth = 1) +
labs(x = "File size", y = "Predicted download time", color = NULL) +
scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
theme_bw()Plotting the two model predictions together, to show how close they are:
modelComparison |>
filter(model %in% c("quad", "quadsimp")) |>
unnest(regression) |>
select(platform, server, model, term, estimate) |>
summarize(estimate = mean(estimate),
.by = c(platform, term, model)) |>
mutate(term = case_match(
term,
"(Intercept)" ~ "b0",
"x" ~ "b1",
"I(x^2)" ~ "b2"
)) |>
pivot_wider(names_from = term, values_from = estimate,
values_fill = 0) |>
mutate(platform = as_factor(platform)) |>
mutate(curve = pmap(list(b0, b1, b2), \(b0, b1, b2) {
tibble(x = seq(0, 7, l = 101)) |>
mutate(y = qfun(x, b0, b1, b2))
} )) |>
unnest(curve) |>
mutate(x = 10^x, y = 10^y) |>
ggplot(aes(x = x, y = y, color = platform, linetype = model)) +
geom_line(linewidth = 1) +
labs(x = "File size", y = "Predicted download time",
color = "Platform", linetype = "Model") +
scale_x_log10(breaks = 10^c(0, 3, 6), labels = c("1KB", "1MB", "1GB")) +
scale_y_log10(breaks = c(1, 60, 3600), labels = c("1s", "1m", "1h")) +
scale_color_manual(values = c("steelblue", "goldenrod", "forestgreen")) +
theme_bw()Finally, a quick note: it is unlikely that the model is realistic for very large files. for instance, we see that Swarm takes between about 10 minutes (based on quadsimp) and 20 minutes (based on quad) to download 1GB of data. But it is unlikely that downloading 100GB would take 2.44 days (!), as the quadsimp model would predict.